#delimit;
clear;
set more off;

global temp /Sastemp;
global path ~;
set mem 5000m;
*set maxvar 10000;

/*================================================
 Program: wqregs.do
 Author:  Avi Ebenstein
 Created: August 2008
 Purpose: OLS regressions of water quality on dumping by basin
=================================================*/
  
global emissiontypes "total_dumping ammonium suspended mercury_amt cadmium chromium lead_amt arsenic volitized_phenol cyanide petroleum_types chemical_oxygen";
*************************************************;
* Aggregate emissions by river basin (levels 1-6);
*************************************************;

use ~/research/pollution/datafiles/county_dumping, clear;
rename lead lead_amt;
rename mercury mercury_amt;
replace lead_amt=lead_amt;

forvalues j=1/6{;

                preserve;
                collapse (sum) $emissiontypes
                (sum) tot_dumping_cnty=total_dumping_cnty (sum) tot_dumping_cntynat=total_dumping_cntynat, by(level`j');       
                save ~/research/pollution/datafiles/emissions_level`j', replace;
                count;
                restore;
              };

use ~/research/pollution/datafiles/county_dumping, clear;
keep province level6;
sort level6;
save ~/pollution/datafiles/provinces, replace;

use ~/pollution/datafiles/hydro1k_pfaf,clear;

sort level6;
merge level6 using ~/pollution/datafiles/provinces;
tab _merge;
keep if _merge==1|_merge==3;
drop _merge;

forvalues j=4/6{;
                preserve;
                disp `j';
                gsort level`j' -area;
                by level`j': keep if _n==1;
                sort level`j';
                merge level`j' using ~/pollution/datafiles/water_pollution_level`j';
                tab _merge;
                keep if _merge==1|_merge==3;
                /* drop basins without water quality data*/
                keep if _merge==3;                  
                drop _merge;

                sort level`j';
                merge level`j' using ~/pollution/datafiles/rainfall_level`j';                
                tab _merge;

                keep if _merge==1|_merge==3;
                drop _merge;

                sort level`j';
                merge level`j' using ~/pollution/datafiles/output_level`j';                
                tab _merge;

                keep if _merge==1|_merge==3;
                drop _merge;

                sort level`j';
                merge level`j' using ~/pollution/datafiles/emissions_level`j';                
                tab _merge;

                keep if _merge==1|_merge==3;
                drop _merge;

                capture drop _merge;                
                save $path/research/pollution/datafiles/temp`j',replace;
                count;
                restore;
              };
clear;

forvalues j=4/6{;                
use $path/research/pollution/datafiles/temp`j';
                global emissiontypes "total_dumping ammonium mercury_amt cadmium chromium lead_amt arsenic volitized_phenol cyanide petroleum_types chemical_oxygen suspended";
foreach emission of global emissiontypes{;
                                         gen ln_`emission'=ln(`emission');
/*************** This is a questionable line of code, but is OK because I'm doing log regressions *****************/
                                         replace `emission'=. if `emission'==0;
                                         egen m_`emission'=mean(`emission');
*                                         replace ln_`emission'=0 if `emission'==0;
                                       };
global mylist "overall_q a_n bod dissolved_ lead mercury oils permanga_n volatile_p";
                foreach i of global mylist{;
                                                                      egen my_`i'=mean(`i'_level`j');
                                         };
                
save $path/research/pollution/datafiles/temp`j'_2, replace;              
              };

global controls "rainfall area,robust cluster(province)";
forvalues j=4/4{;
                use $path/research/pollution/datafiles/temp`j'_2,clear;

                
***********Area is in 100 billions*************;
                replace area=area/100000000000;
*               There are two very large river basins;
               drop if level4<0;
               drop if area>1;
***********************************************;

                reg overall_q_level`j' ln_total_dumping $controls;
                outreg2 using ~/pollution/outfiles/wqregs.out,replace  se bdec(3) tdec(3) addstat(Average value, m_total_dumping, average grade, my_overall_q) adec(0,2);
                reg a_n_level`j' ln_ammonium $controls;
                outreg2 using ~/pollution/outfiles/wqregs.out,append  se bdec(3) tdec(3) addstat(Average value, m_ammonium, average grade, my_a_n) adec(0,2);                
                reg bod_level`j' ln_chemical_oxygen $controls;
                outreg2 using ~/pollution/outfiles/wqregs.out,append  se bdec(3) tdec(3) addstat(Average value, m_chemical_oxygen, average grade, my_bod) adec(0,2);
                reg dissolved__level`j' ln_chemical_oxygen $controls;
                outreg2 using ~/pollution/outfiles/wqregs.out,append  se bdec(3) tdec(3) addstat(Average value, m_chemical_oxygen, average grade, my_dissolved_) adec(0,2);
                reg lead_level`j' ln_lead $controls;
                 outreg2 using ~/pollution/outfiles/wqregs.out,append  se bdec(3) tdec(3) addstat(Average value, m_lead_amt, average grade, my_lead) adec(2,2);                                
                reg mercury_level`j' ln_mercury $controls;
                 outreg2 using ~/pollution/outfiles/wqregs.out,append  se bdec(3) tdec(3) addstat(Average value, m_mercury_amt, average grade, my_mercury) adec(2,2);                                
                reg oils_level`j' ln_petroleum_types $controls;
                outreg2 using ~/pollution/outfiles/wqregs.out,append  se bdec(3) tdec(3) addstat(Average value, m_petroleum_types, average grade, my_oils) adec(1,2);                                
                reg permanga_n_level`j' ln_suspended $controls;
                outreg2 using ~/pollution/outfiles/wqregs.out,append  se bdec(3) tdec(3) addstat(Average value, m_suspended, average grade, my_permanga) adec(0,2);                                
                reg volatile_p_level`j' ln_volitized_phenol $controls;
                outreg2 using ~/pollution/outfiles/wqregs.out,append  se bdec(3) tdec(3) addstat(Average value, m_volitized_phenol, average grade, my_volatile_p) adec(2,2);

              };
                type ~/pollution/outfiles/wqregs.out;

ex;


